library(tidyverse)
library(lubridate) # work with dates
library(magrittr) # more pipes
library(broom) # tidy regression outputs
library(fixest) # fixest::feols() is very fast with big data and fixed effects
library(patchwork) # for combining plots
library(ggthemes) # for ggthemes
library(knitr) # for making regression tables in Latex
shift <- data.table::shift # for lagging/leading variables

#### Merge expanded dictionary terms to analysis dataset ####

mp_z <- read_csv("data/analysis/MP_panelv2_climpolicyterms.csv") %>% 
  select(date, full_name, sum_ctweetz, sum_cspchz) %>% 
  right_join(., mp_df, by = c("full_name", "date"))


#### Direct effect of protest on tweets ####

## Run fixed effects regression models predicting tweets as a function of protest

## Hypothesis 1: Direct effect of protest on online speech
z_baseline1 <- feols(sum_ctweetz ~ fff_events_cumulative1 + sum_tweets |
              full_name + year_month,
            data = mp_z)
z_baseline2 <- feols(sum_ctweetz ~ fff_events_cumulative1 + sum_tweets + frontbench |
              full_name + year_month,
            data = mp_z)
z_baseline3 <- feols(sum_ctweetz ~ fff_events_cumulative1 + sum_tweets |
              full_name + year_week,
            data = mp_z)
z_baseline4 <- feols(sum_ctweetz ~ fff_events_cumulative1 + sum_tweets + frontbench |
              full_name + year_week,
            data = mp_z)

gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  # "r.squared", "R2",            2,
  "nobs",      "Observations",             0,
  "FE: full_name",  "MP fixed effect",      0,
  "FE: year_month", "Year-month fixed effect", 0,
  "FE: year_week", "Year-week fixed effect", 0)

# Table 1
modelsummary(models = list(z_baseline1, z_baseline2, z_baseline3, z_baseline4), 
             output = "latex",
             coef_rename = c(
               "fff_events_cumulative1" = "FFF Protest$_{t:t-1}$",
               "sum_tweets" = "Tweets (daily sum)",
               "frontbench" = "Frontbench"),
             gof_map = gm, 
             notes = list('Outcome is count of climate tweets, expanded dictionary',
                          'Standard errors clustered by MP'),
             title = 'Direct effect of Fridays for Future protest on MPs’ tweets',
             stars = c("*" = 0.05, "**" = 0.01))

## Baseline effects table
z_table_baseline <- bind_rows(z_baseline1 %>% tidy() %>% mutate(model = 1),
                              z_baseline2 %>% tidy() %>% mutate(model = 2),
                              z_baseline3 %>% tidy() %>% mutate(model = 3),
                              z_baseline4 %>% tidy() %>% mutate(model = 4)) %>% 
  # Combine all model outputs
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>% 
  # Retain only the protest variable
  filter(str_detect(term, "fff")) %>%
  # Denote specification/controls
  mutate(covariates = c("None", "Frontbench",
                        "None", "Frontbench")) %>% 
  # Extract model FEs
  mutate(fe1 = c(z_baseline1$fixef_vars[[1]],
                 z_baseline2$fixef_vars[[1]],
                 z_baseline3$fixef_vars[[1]],
                 z_baseline4$fixef_vars[[1]])) %>% 
  mutate(fe2 = c(z_baseline1$fixef_vars[[2]],
                 z_baseline2$fixef_vars[[2]],
                 z_baseline3$fixef_vars[[2]],
                 z_baseline4$fixef_vars[[2]])) %>% 
  # Extract model N
  mutate(nobs = c(z_baseline1$nobs,
                  z_baseline2$nobs,
                  z_baseline3$nobs,
                  z_baseline4$nobs)) %>%
  # Format numbers
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3), 
                                              nsmall = 3)) %>% 
  mutate_all(~as.character(.)) %>% 
  # Stack
  pivot_longer(names_to = "param", values_to = "value", 
               c(estimate:p_value, covariates:nobs)) %>% 
  # Paste parentheses around standard errors for table
  mutate(value = ifelse(param == "std_error", 
                        paste("(", value, ")", sep = ""), value)) %>% 
  # Clean up names and labels
  mutate(value = ifelse(param == "fe1" & value == "full_name", 
                        "MP", value)) %>%
  mutate(value = ifelse(param == "fe2" & value == "year_month", 
                        "Year, month", value)) %>% 
  mutate(value = ifelse(param == "fe2" & value == "year_week", 
                        "Year, week", value)) %>% 
  # Add statistical significance stars
  group_by(model) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>% 
  # Spread to model columns
  pivot_wider(names_from = "model", values_from = "value", 
              names_prefix = "Z") %>% 
  mutate(term = case_when(param == "estimate" ~ "FFF Protest",
                          param == "std_error" ~ "",
                          param == "covariates" ~ "Covariates",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "time fixed effect",
                          param == "nobs" ~ "Observations")) %>% 
  select(-param) %>% 
  # Format as Latex table
  knitr::kable(., 
               format = "latex",
               booktabs = T,
               linesep = "")
z_table_baseline

#### Evaluate tweets and speeches with different lag structures ####

# First, in different windows: "cumulative" codings of the protest variable
# sw() argument: evaluates each of the variables in the brackets individually, so creates that many model in a list
z_lags_tweets <- feols(sum_ctweetz ~ sum_tweets + 
                    sw(fff_events_cumulative_lead2, fff_events_cumulative_lead1,
                       fff_events_cumulative0, 
                       fff_events_cumulative1, fff_events_cumulative2, 
                       fff_events_cumulative3, fff_events_cumulative4,
                       fff_events_cumulative5) |
                    full_name + year_week,
                  data = mp_z)
# z_lags_tweets

## Collect the model outputs from each step [sw()/csw()] in the fixest::feols models

sw_steps <- seq_along(select(mp_z, starts_with("fff_events_cumulative")))
z_outs_lags_tweets <- as.data.frame(matrix(NA))

## Bind all the fixest_1 model outputs together using loops
for (step in sw_steps){
  z_int_lags_tweets <- tidy(z_lags_tweets[[step]]) %>%   
    filter(str_detect(term, "fff")) %>% 
    mutate(step_number = step)
  z_outs_lags_tweets <- bind_rows(z_outs_lags_tweets, z_int_lags_tweets)
}

# And tidy
z_outs_lags_tweets <- z_outs_lags_tweets %>% 
  select(-V1) %>% 
  slice(2:nrow(.)) %>% 
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "step_number")) %>% 
  mutate(days = as.integer(str_sub(term, start = -1L, end = -1L))) %>% 
  mutate(days = ifelse(str_detect(term, "lead"), -1*days, days)) %>% 
  mutate(model = "window")
# z_outs_lags_tweets


## Speech models 

## First, the most basic speech model to match the syntax of the Twitter models
z_lags_speeches <- feols(sum_cspchz ~ sum_spchs +
                    sw(fff_events_cumulative_lead2, fff_events_cumulative_lead1,
                       fff_events_cumulative0, 
                       fff_events_cumulative1, fff_events_cumulative2, 
                       fff_events_cumulative3, fff_events_cumulative4,
                       fff_events_cumulative5) |
                    full_name + year_week,
                  data = mp_z)
# z_lags_speeches

z_outs_lags_speeches <- as.data.frame(matrix(NA))

## Bind all the fixest_1 model outputs together
for (step in sw_steps){
  z_int_lags_speeches <- tidy(z_lags_speeches[[step]]) %>%   
    filter(str_detect(term, "fff")) %>% 
    mutate(step_number = step)
  z_outs_lags_speeches <- bind_rows(z_outs_lags_speeches, z_int_lags_speeches)
}

# And tidy
z_outs_lags_speeches <- z_outs_lags_speeches %>% 
  select(-V1) %>% 
  slice(2:nrow(.)) %>% 
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "step_number")) %>% 
  mutate(days = as.integer(str_sub(term, start = -1L, end = -1L))) %>% 
  mutate(days = ifelse(str_detect(term, "lead"), -1*days, days)) %>% 
  mutate(model = "window")
# z_outs_lags_speeches

## Plot coefficients for tweets and speeches side-by-side
z_figure_tweets_speeches <- rbind(z_outs_lags_tweets, z_outs_lags_speeches) %>% 
  mutate(ci_lower = estimate-1.96*std_error,
         ci_upper = estimate+1.96*std_error) %>% 
  mutate(outcome = rep(c("Tweets", "Speeches"), each = 8)) %>% 
  ggplot(., aes(days, estimate, color = outcome, group = outcome, shape = outcome)) +
  geom_hline(yintercept = 0, linetype = 2, color = "black") +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
                 position = position_dodge(width = 0.5), size=0.75) +
  geom_point(aes(shape = outcome), 
             position = position_dodge(width = 0.5), size = 3) +
  scale_shape_manual(values = c(19,18)) +
  scale_x_continuous(breaks = seq(-2, 5, 1),
                     labels = c("-2\nW", "-1\nTh", 
                                "1\nFriday", "2\nF:Sa", 
                                "3\nF:Su", "4\nF:M", 
                                "5\nF:Tu",
                                "6\nF:W")) +
  scale_y_continuous(breaks = seq(-0.05, .30, .05)) +
  scale_color_manual(values=c("#79b321","#344d0e")) +
  labs(x = "Days in local FFF protest window", 
       colour = "Outcome (expanded dictionary)",
       shape = "Outcome (expanded dictionary)",
       y = "Estimate and 95% confidence interval") +
  theme_tufte(base_family = "Arial") +
  theme(legend.position = "bottom",
        # panel.background = element_rect(fill=NA),
        legend.text = element_text(size=15),
        legend.title = element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.x = element_text(size=15),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        panel.grid.major = element_blank())
z_figure_tweets_speeches
ggsave("plots/figa5.png", width = 8, height = 6, dpi = 600)


#### Other structures for time ####

## Next, consider a set of models where time is treated differently
#' Want to be matching the opportunity structure that MPs have for speaking
#' Three models:
#' Daily-level; weekly-level; sitting days

# A first set of models as in the baseline model: tweets and speeches
z_daily_tweets <- feols(sum_ctweetz ~ sum_tweets + frontbench +
                           fff_events_cumulative1 |
                           full_name + year_week,
                         data = mp_z)
z_daily_speeches <- feols(sum_cspchz ~ sum_spchs + frontbench +
                          fff_events_cumulative1 |
                          full_name + year_week,
                        data = mp_z)


## Second, consider a model where the daily speech data is collapsed to the week level

mp_z_week <- mp_z %>%
  select(date, year_month, year_week, parliament_sitting,
         full_name, lab1_con0, frontbench,
         sum_tweets, sum_ctweetz,
         sum_spchs, sum_cspchz,
         fff_event) %>%
  # Create weeks starting on Friday to aggregate with
  mutate(week_friday = week(round_date(date, "week", week_start = 1)),
         year_week_friday = paste(year(date), week_friday, sep = "_")) %>%
  relocate(c(week_friday, year_week_friday), .after = year_week) %>% 
  group_by(full_name, year_week_friday) %>% 
  mutate(parliament_sitting_week = max(parliament_sitting, na.rm=T),
         week_sum_spchs = sum(sum_spchs, na.rm=T),
         week_sum_cspchz = sum(sum_cspchz, na.rm=T),
         week_sum_tweets = sum(sum_tweets, na.rm=T),
         week_sum_ctweetz = sum(sum_ctweetz, na.rm=T),
         week_fff_event = sum(fff_event),
         week_frontbench = max(frontbench, na.rm=T)) %>% 
  ungroup() %>% 
  # And aggregate to the weekly level
  select(year_month, year_week_friday, parliament_sitting_week, 
         full_name, lab1_con0, week_frontbench,
         starts_with("week")) %>% 
  ## NB. Should be about 85,000 observations, if not check that year_week and year_week_friday aren't both included with distinct()
  distinct() %>% 
  # And create a lagged protest variable
  group_by(full_name) %>% 
  mutate(week_fff_event_lag1 = shift(week_fff_event, type = "lag", n = 1)) %>% 
  # Remove rows with NAs in the treatment variable induced by lags/leads
  filter(!is.na(week_fff_event_lag1))

# Tweets at the week-level
z_weekly_tweets <- feols(week_sum_ctweetz ~ week_sum_tweets + week_frontbench + 
                           week_fff_event | # Don't need to lag the week because of how the weeks are defined
                           full_name + year_week_friday, 
                         data = mp_z_week)

# Speeches at the week-level
z_weekly_speeches <- feols(week_sum_cspchz ~ week_sum_spchs + week_frontbench +
                            week_fff_event |
                            full_name + year_week_friday,
                          data = mp_z_week)

## Third, consider the day to the next parliamentary sitting instead of the week

mp_z_sitting <- mp_z %>% 
  # Group by the cumulative number of days that parliament has been sitting since session began
  group_by(tally_sitting) %>% 
  # Calculate characteristics of that sitting window
  mutate(min_window_year = min(year),
         min_window_month = min(month),
         min_window_week = min(week),
         window_length = as.numeric(max(ymd(date)) - min(ymd(date)))) %>% 
  ungroup() %>%  
  # Calculate whether an MP spoke/tweeted during that interval
  group_by(full_name, tally_sitting) %>% 
  mutate(sum_tweets_window = sum(sum_tweets),
         sum_ctweetz_window = sum(sum_ctweetz),
         sum_spchs_window = sum(sum_spchs),
         sum_cspchz_window = sum(sum_cspchz)) %>% 
  # Calculate whether there were protests in that district during that interval
  mutate(sum_fff_events_window = sum(fff_event)) %>% 
  # Keep key variables
  select(date, parliament_sitting, tally_sitting,
         full_name, frontbench,
         starts_with("min_"), window_length, ends_with("_window")) %>% 
  ungroup() %>% 
  # Collapse data to the "parliament--sitting day period" --- a unit of opportunity
  distinct(tally_sitting, full_name, .keep_all=T) %>% 
  # Lag protest measure
  group_by(full_name) %>% 
  mutate(sum_fff_events_window_lag1 = shift(sum_fff_events_window, type = "lag", n = 1))

## Estimate the effect of climate protests on speaking in the next parliamentary sitting day

# Tweets at the sitting--day-level
z_sitting_tweets <- feols(sum_ctweetz_window ~ sum_tweets_window + 
                           frontbench + window_length +
                           sum_fff_events_window_lag1 |
                           full_name + min_window_year^min_window_week, # Combine year and week into a fixed effect
                         data = mp_z_sitting)

# Speech at the sitting--day-level
z_sitting_speeches <- feols(sum_cspchz_window ~ sum_spchs_window + 
                             frontbench + window_length +
                             sum_fff_events_window_lag1 |
                             full_name + min_window_year^min_window_week, # Combine year and week into a fixed effect
                           data = mp_z_sitting)

## Put these together in a table
z_table_speech <- bind_rows(z_daily_tweets %>% tidy() %>% mutate(model = 5),
                            z_daily_speeches %>% tidy() %>% mutate(model = 6),
                            z_weekly_tweets %>% tidy() %>% mutate(model = 7),
                            z_weekly_speeches %>% tidy() %>% mutate(model = 8),
                            z_sitting_tweets %>% tidy() %>% mutate(model = 9), 
                            z_sitting_speeches %>% tidy() %>% mutate(model = 10)) %>% 
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>% 
  filter(str_detect(term, "fff")) %>%
  mutate(covariates = "Frontbench") %>% 
  mutate(outcome = rep(c("Tweets", "Speeches"), times = 3)) %>% 
  mutate(unit_observation = rep(c("Daily", "Weekly", "Sitting days"), 
                                each = 2)) %>% 
  mutate(fe1 = c(z_daily_tweets$fixef_vars[[1]],
                 z_daily_speeches$fixef_vars[[1]],
                 z_weekly_tweets$fixef_vars[[1]],
                 z_weekly_speeches$fixef_vars[[1]],
                 z_sitting_tweets$fixef_vars[[1]],
                 z_sitting_speeches$fixef_vars[[1]])) %>% 
  mutate(fe2 = c(z_daily_tweets$fixef_vars[[2]],
                 z_daily_speeches$fixef_vars[[2]],
                 z_weekly_tweets$fixef_vars[[2]],
                 z_weekly_speeches$fixef_vars[[2]],
                 z_sitting_tweets$fixef_vars[[2]],
                 z_sitting_speeches$fixef_vars[[2]])) %>% 
  mutate(nobs = c(z_daily_tweets$nobs,
                  z_daily_speeches$nobs,
                  z_weekly_tweets$nobs,
                  z_weekly_speeches$nobs,
                  z_sitting_tweets$nobs,
                  z_sitting_speeches$nobs)) %>% 
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3), 
                                              nsmall = 3)) %>% 
  mutate_all(~as.character(.)) %>% 
  pivot_longer(names_to = "param", values_to = "value", 
               c(estimate:p_value, covariates:nobs)) %>% 
  mutate(value = ifelse(param == "std_error", 
                        paste("(", value, ")", sep = ""), value)) %>% 
  mutate(value = ifelse(param == "fe1" & value == "full_name", 
                        "MP", value)) %>%
  mutate(value = ifelse(param == "fe2" & value == "year_week", 
                        "Year--week", value)) %>% 
  mutate(value = ifelse(param == "fe2" & value == "year_week_friday", 
                        "Year--week", value)) %>% 
  mutate(value = ifelse(param == "fe2" & value == "min_window_year^min_window_week", 
                        "Year--week", value)) %>% 
  group_by(model) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic", 
         param != "p_value") %>% 
  mutate(term = "fff_protest") %>% 
  pivot_wider(names_from = "model", values_from = "value", 
              names_prefix = "Z") %>% 
  mutate(term = case_when(param == "estimate" ~ "FFF Protest",
                          param == "std_error" ~ "",
                          param == "covariates" ~ "Covariates",
                          param == "outcome" ~ "Outcome",
                          param == "unit_observation" ~ "Unit of observation",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "Time fixed effect",
                          param == "nobs" ~ "Observations")) %>% 
  filter(term!="Outcome") %>% 
  select(-param) %>% 
  knitr::kable(., 
               format = "latex",
               booktabs = T,
               linesep = "")
z_table_speech
# --> Copy table_speech into Latex



## Heterogeneous effects models 

# Load in relevant MP-level covariates
mp_covariates <- read_csv("data/analysis/mp_hte_covariates.csv")

mp_z_hte <- left_join(mp_z, mp_covariates, by = "full_name")


## Heterogeneous effects by party: Labour/Conservative

z_hte_labcon_tweets <- feols(sum_ctweetz ~ sum_tweets + frontbench +
                             fff_events_cumulative1 * lab1_con0  |
                             full_name + year_week,
                           data = mp_z_hte)

z_hte_labcon_speeches <- feols(sum_cspchz ~ sum_spchs + frontbench +
                               fff_events_cumulative1 * lab1_con0  |
                               full_name + year_week,
                             data = mp_z_hte)

## Heterogeneous effects for the All-Parliament Group on Climate Change

z_hte_apg_tweets <- feols(sum_ctweetz ~ sum_tweets + frontbench +
                          fff_events_cumulative1 * apg_climate_190731 | 
                          full_name + year_week,
                        data = mp_z_hte)

z_hte_apg_speeches <- feols(sum_cspchz ~ sum_spchs + frontbench +
                            fff_events_cumulative1 * apg_climate_190731 | 
                            full_name + year_week,
                          data = mp_z_hte)

## Heterogeneous effects for the Conservative Environment Network MPs v. Tories

z_hte_cen_tweets <- feols(sum_ctweetz ~ sum_tweets + frontbench +
                          fff_events_cumulative1 * cen_caucus_191213  |
                          full_name + year_week,
                        data = mp_z_hte %>% filter(party_value == "Conservative"))

z_hte_cen_speeches <- feols(sum_cspchz ~ sum_spchs + frontbench +
                            fff_events_cumulative1 * cen_caucus_191213  |
                            full_name + year_week,
                          data = mp_z_hte %>% filter(party_value == "Conservative"))

## Heterogeneous effects for the Net Zero Scrutiny Group v. Tories

z_hte_nzsg_tweets <- feols(sum_ctweetz ~ sum_tweets + frontbench +
                           fff_events_cumulative1 * netzero_scrutiny |
                           full_name + year_week,
                         data = mp_z_hte %>% filter(party_value == "Conservative"))

z_hte_nzsg_speeches <- feols(sum_cspchz ~ sum_spchs + frontbench +
                             fff_events_cumulative1 * netzero_scrutiny |
                             full_name + year_week,
                           data = mp_z_hte %>% filter(party_value == "Conservative"))

## Frontbench

z_hte_frontbench_tweets <- feols(sum_ctweetz ~ sum_tweets + 
                                 frontbench * fff_events_cumulative1  |
                                 full_name + year_week,
                               data = filter(mp_z_hte, !is.na(lab1_con0)))
z_hte_frontbench_speeches <- feols(sum_cspchz ~ sum_spchs + 
                                   frontbench * fff_events_cumulative1  |
                                   full_name + year_week,
                                 data = filter(mp_z_hte, !is.na(lab1_con0)))

## Combine HTE tweets models into table
z_table_hte_tweets <- bind_rows(z_hte_apg_tweets %>% tidy() %>% mutate(model = 1),
                                z_hte_labcon_tweets %>% tidy() %>% mutate(model = 2),
                                z_hte_cen_tweets %>% tidy() %>% mutate(model = 3),
                                z_hte_nzsg_tweets %>% tidy() %>% mutate(model = 4),
                                z_hte_frontbench_tweets %>% tidy() %>% mutate(model = 5)) %>%
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>%
  filter(str_detect(term, "fff")) %>%
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3),
                                              nsmall = 3)) %>%
  mutate_all(~as.character(.)) %>%
  pivot_longer(names_to = "param", values_to = "value",
               estimate:p_value) %>%
  mutate(value = ifelse(param == "std_error",
                        paste("(", value, ")", sep = ""), value)) %>%
  group_by(model, term) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>%
  pivot_wider(names_from = "model", values_from = "value",
              names_prefix = "H") %>%
  add_row(term = "Fixed effect", param = "fe1",
          H1 = "MP",
          H2 = "MP",
          H3 = "MP",
          H4 = "MP",
          H5 = "MP") %>%
  add_row(term = "Fixed effect", param = "fe2",
          H1 = "Year--week",
          H2 = "Year--week",
          H3 = "Year--week",
          H4 = "Year--week",
          H5 = "Year--week") %>%
  add_row(term = "nobs", param = "nobs",
          H1 = as.character(z_hte_apg_tweets$nobs),
          H2 = as.character(z_hte_labcon_tweets$nobs),
          H3 = as.character(z_hte_cen_tweets$nobs),
          H4 = as.character(z_hte_nzsg_tweets$nobs),
          H5 = as.character(z_hte_frontbench_tweets$nobs)) %>%
  mutate(term = case_when(term == "fff_events_cumulative1" ~ "FFF",
                          term == "fff_events_cumulative1:lab1_con0" ~ "FFF times Labour",
                          term == "fff_events_cumulative1:cen_caucus_191213" ~ "FFF times CEN",
                          term == "fff_events_cumulative1:netzero_scrutiny" ~ "FFF times NZSG",
                          term == "fff_events_cumulative1:apg_climate_190731" ~ "FFF times APG",
                          term == "frontbench:fff_events_cumulative1" ~ "FFF times Frontbench",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "Time fixed effect",
                          param == "nobs" ~ "Observations")) %>%
  mutate(term = ifelse(param == "std_error", "", term)) %>% 
  select(-param) %>% 
  # select(-param, `APP-H1` = H1, `APP-H2` = H2, `APP-H3` = H3, `APP-H4` = H4) %>% 
  mutate_at(vars(2:6), ~ifelse(is.na(.), "", .)) %>% 
  add_row(term = "Covariates", 
          H1 = "Yes",
          H2 = "Yes",
          H3 = "Yes",
          H4 = "Yes",
          H5 = "Yes") %>%
  {. ->> z_int_hte_tweets } %>%
  # filter(term!="") %>% # Remove the standard error term
  rbind(., c("Outcome", rep("Tweets", times = 5))) %>% 
  knitr::kable(.,
               format = "simple",
               booktabs = T,
               linesep = "")
# z_table_hte_tweets

## Combine HTE speech models into table
z_table_hte_speeches <- bind_rows(z_hte_apg_speeches %>% tidy() %>% mutate(model = 6),
                                  z_hte_labcon_speeches %>% tidy() %>% mutate(model = 7),
                                  z_hte_cen_speeches %>% tidy() %>% mutate(model = 8),
                                  z_hte_nzsg_speeches %>% tidy() %>% mutate(model = 9),
                                  z_hte_frontbench_speeches %>% tidy() %>% mutate(model = 10)) %>%
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>%
  filter(str_detect(term, "fff")) %>%
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3),
                                              nsmall = 3)) %>%
  mutate_all(~as.character(.)) %>%
  pivot_longer(names_to = "param", values_to = "value",
               estimate:p_value) %>%
  mutate(value = ifelse(param == "std_error",
                        paste("(", value, ")", sep = ""), value)) %>%
  group_by(model, term) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>%
  pivot_wider(names_from = "model", values_from = "value",
              names_prefix = "H") %>%
  add_row(term = "Fixed effect", param = "fe1",
          H6 = "MP",
          H7 = "MP",
          H8 = "MP",
          H9 = "MP",
          H10 = "MP") %>%
  add_row(term = "Fixed effect", param = "fe2",
          H6 = "Year--week",
          H7 = "Year--week",
          H8 = "Year--week",
          H9 = "Year--week",
          H10 = "Year--week") %>%
  add_row(term = "nobs", param = "nobs",
          H6 = as.character(z_hte_apg_speeches$nobs),
          H7 = as.character(z_hte_labcon_speeches$nobs),
          H8 = as.character(z_hte_cen_speeches$nobs),
          H9 = as.character(z_hte_nzsg_speeches$nobs),
          H10 = as.character(z_hte_frontbench_speeches$nobs)) %>%
  mutate(term = case_when(term == "fff_events_cumulative1" ~ "FFF",
                          term == "fff_events_cumulative1:lab1_con0" ~ "FFF times Labour",
                          term == "fff_events_cumulative1:cen_caucus_191213" ~ "FFF times CEN",
                          term == "fff_events_cumulative1:netzero_scrutiny" ~ "FFF times NZSG",
                          term == "fff_events_cumulative1:apg_climate_190731" ~ "FFF times APG",
                          term == "frontbench:fff_events_cumulative1" ~ "FFF times Frontbench",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "Time fixed effect",
                          param == "nobs" ~ "Observations")) %>%
  mutate(term = ifelse(param == "std_error", "", term)) %>% 
  select(-param) %>% 
  mutate_at(vars(2:6), ~ifelse(is.na(.), "", .)) %>% 
  add_row(term = "Covariates", 
          H6 = "Yes",
          H7 = "Yes",
          H8 = "Yes",
          H9 = "Yes",
          H10 = "Yes") %>%
  {. ->> z_int_hte_speeches } %>%
  # filter(term!="") %>% # Remove the standard error term
  rbind(., c("Outcome", rep("Speeches", times = 5))) %>% 
  knitr::kable(.,
               format = "simple",
               booktabs = T,
               linesep = "")
# z_table_hte_speeches

## Combining tweets and speeches in one table
z_table_hte_combined <- cbind(z_int_hte_tweets[,1:2], z_int_hte_speeches[,2],
                              z_int_hte_tweets[,3], z_int_hte_speeches[,3],
                              z_int_hte_tweets[,4], z_int_hte_speeches[,4],
                              z_int_hte_tweets[,5], z_int_hte_speeches[,5],
                              z_int_hte_tweets[,6], z_int_hte_speeches[,6]) %>% 
  rbind(., c("Outcome", rep(c("Tweets", "Speeches"), times = 5))) %>% 
  set_colnames(value = c("term", paste("Z", seq(from = 13, to = 22, by = 1), sep = ""))) %>% 
  knitr::kable(.,
               format = "latex",
               booktabs = T,
               linesep = "")
z_table_hte_combined




#### End. 